#Alexander f. gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Prepare treatment definition for anthracite falsification tests.

#Load packages
library(tidyverse)
library(tidylog)
library(data.table)
library(here)

#load data
cbp <- fread(here("data", "input", "cbp_naics", "efsy_panel_naics.csv"))
#update fips code to correct
cbp[cbp$fipstate==46&cbp$fipscty==113&cbp$year==2016,]$fipscty <- 102
cbp[cbp$fipstate==12&cbp$fipscty==25&cbp$year==2000,]$fipscty <- 86
#from wikipedia:Note that Columbus is not an independent city as the number suggests.
#It is a consolidated city-county with Muscogee County, incorporating everything outside of Fort Benning
cbp[cbp$fipstate==13&cbp$fipscty==510,]$fipscty <- 215

#Genevieve County in Missouri changed its FIPS code from 29193 to 29186 in. 1979
#302 29193  1976
#303 29193  1980
cbp[cbp$fipstate==29&cbp$fipscty==193&cbp$year==1980,]$fipscty<-186

#304 46131  1980
#South Dakota, 1983: Washabaugh county (FIPS 46131) merges into Jackson county (FIPS 46071).
#[The Census Bureau states that this change was effective as of 1979, but there are datasets that still use this county code in the early 1980s]

#305 51695  1976
#Nansemond city (former FIPS code 51695) merged with Suffolk city

#306 51710  2016
#Norfolk city is a county in Virginia

#subset data
cbp_sub <- subset(cbp, naics12 %in% c("212113"))
#create categorical variable by industry type
cbp_sub$industry <- "antcoal"
#aggregate by county, year and industry
cbp_sub <- cbp_sub %>%
  group_by(fipstate,fipscty,industry,year) %>%
  summarise(emp = sum(emp))
#add 2019 and 2020 data
cbp19 <- fread(here("data", "input", "cbp", "cbp19co.txt"))
cbp19$year <- 2019
cbp20 <- fread(here("data", "input", "cbp", "cbp20co.txt"))
cbp20$year <- 2020
#merge together
cbpnew <- bind_rows(cbp19,cbp20)
#aggregate industries
cbpnew_sub <- cbpnew %>%
  mutate(naics = gsub("-", "", naics)) %>%
  filter(naics %in% c("212113"))
cbpnew_sub$industry <- "antcoal"
cbpnew_sub <- cbpnew_sub %>%
  group_by(fipstate, fipscty, year, industry) %>%
  summarise(emp = sum(emp, na.rm = TRUE))
#bind together
cbp_out <- bind_rows(cbp_sub, cbpnew_sub)
#pad county fips codes to prepare to concatonate them with the state fip codes
cbp_out$fipscty <- stringr::str_pad(cbp_out$fipscty, 3, "left", "0")
cbp_out$fips <- paste0(cbp_out$fipstate, cbp_out$fipscty)
#subset to the essential variables
cbp_out <- subset(cbp_out, select = -c(fipstate, fipscty))
cbp_out$fips <- as.numeric(cbp_out$fips)
#remove invalid fips codes
cbp_out <- subset(cbp_out, !grepl("999", str_sub(fips, -3, -1)))
#save file
saveRDS(cbp_out, here("data", "inter", "cbp_ant.rds"))
# Stop redirecting to the log file
sink()